%config Completer.use_jedi = False #for autocompletion feature

import pandas as pd
import pandas_datareader.data as web
import numpy as np
import statsmodels.api as sm
import scipy.stats as scs
import yfinance as yf
import sklearn.mixture as mix
from pytrends.request import TrendReq
import time
import datetime

import matplotlib as mpl
from matplotlib import cm
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator
%matplotlib inline


from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly
import plotly.io as pio
pio.templates.default = "none"

from tqdm import tqdm
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
df_SP500 = table[0]
df_SP500.head()
Symbol Security SEC filings GICS Sector GICS Sub-Industry Headquarters Location Date first added CIK Founded
0 MMM 3M reports Industrials Industrial Conglomerates Saint Paul, Minnesota 1976-08-09 66740 1902
1 ABT Abbott Laboratories reports Health Care Health Care Equipment North Chicago, Illinois 1964-03-31 1800 1888
2 ABBV AbbVie reports Health Care Pharmaceuticals North Chicago, Illinois 2012-12-31 1551152 2013 (1888)
3 ABMD Abiomed reports Health Care Health Care Equipment Danvers, Massachusetts 2018-05-31 815094 1981
4 ACN Accenture reports Information Technology IT Consulting & Other Services Dublin, Ireland 2011-07-06 1467373 1989
label_dictionary = {
    'KO': 'Cola',
    'PEP': 'Pepsi',
    'LEG': 'Leggett',
    'HD': 'Home Depot',
    
   }
start = datetime.datetime(1980, 1, 1)
end = datetime.datetime.today()-datetime.timedelta()

factors = list(label_dictionary.keys())

factordict = dict()

for f in tqdm(factors):
    try:
        factordict[f] = pd.DataFrame(yf.download(f,start=start, end=end,progress=False))
    except:
        pass
100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:08<00:00,  2.20s/it]
import plotly.graph_objs as go

  
fig = go.Figure()

dates = [factordict[f].index[0] for f in factors]
start_date = '1980-01-01' #max(dates)
for f in factordict:
    # filter by earliest date
    factor = factordict[f][(factordict[f].index > start_date)]
    fig.add_trace(go.Scattergl(
        x=factor.index,
        y=np.log(factor['Adj Close'].values/factor['Adj Close'].values[0]), 
        name=label_dictionary[f],
        mode="lines"))


fig.update_layout(title={'text': 'Stock comparison'})
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
dd = dict((f,factordict[f]['Adj Close']) for f in factordict)
df = pd.DataFrame(dd)
df = df.dropna()
df.head()

dfH = dict((f,factordict[f]['High']) for f in factordict)
dfH = pd.DataFrame(dfH)
dfH = dfH.dropna()

dfL = dict((f,factordict[f]['Low']) for f in factordict)
dfL = pd.DataFrame(dfL)
dfL = dfL.dropna()
 
def calculateSpan(price):
    S = list(np.zeros(len(price)))
    n = len(price)
    # Create a stack and push index of 1st element to it
    st = []
    st.append(0)
 
    # Span value of 1st element is 1
    S[0] = 1
 
    for i in range(1, n):
         
        # Pop elements from stack while stack is not
        # empty and top of stack is smaller than price[i]
        while( len(st) > 0 and price[st[-1]] >= price[i]):
            st.pop()
 
        # If stack becomes empty, then price[i] is greater
        # than all elements on left of it, i.e. price[0],
        # price[1], ..price[i-1]. Else the price[i] is
        #  greater than elements after top of stack
        S[i] = i + 1 if len(st) <= 0 else (i - st[-1])
 
        # Push this element to stack
        st.append(i)
    return S

def minmax_scale(df):
    return (df - np.min(df)) / (np.max(df)-np.min(df))

def z_scale(df):
    return (df - np.average(df)) / (np.std(df))
gap = 0.001

from plotly.subplots import make_subplots
import plotly.graph_objects as go

for f in df.columns[:5]:
    var = f
    df_test = df[var]
    dH = dfH[var]
    dL = dfL[var]
    
    fig = make_subplots(rows=6, cols=1,
                        shared_xaxes=True,
                        vertical_spacing=0.02)

    fig.add_trace(go.Scattergl(
        x=df_test.index,
        y=np.log(df_test/df_test[0]),
        name=label_dictionary[var],
        mode="lines"),row=1, col=1)

    fig.add_trace(go.Scattergl(
        x=df_test.index[df_test.pct_change()>=gap],
        y=df_test.pct_change()[df_test.pct_change()>=gap], 
        name='+ve daily returns',
        mode="lines"),row=2, col=1)

    fig.add_trace(go.Scattergl(
        x=df_test.index[df_test.pct_change()<-gap],
        y=df_test.pct_change()[df_test.pct_change()<-gap], 
        name='-ve daily returns',
        mode="lines"),row=2, col=1)

    fig.add_trace(go.Scattergl(
        x=df_test.index[df_test.pct_change()>=gap],
        y=df_test.pct_change()[df_test.pct_change()>=gap].rolling(21).std()*21**0.5, 
        name='21-day +ve volatility',
        mode="lines"),row=3, col=1)

    fig.add_trace(go.Scattergl(
        x=df_test.index[df_test.pct_change()<-gap],
        y=df_test.pct_change()[df_test.pct_change()<-gap].rolling(21).std()*21**0.5, 
        name='21-day -ve volatility',
        mode="lines"),row=3, col=1)

    fig.add_trace(go.Scattergl(
        x=dH.index[dH.pct_change()>=gap],
        y=dH.pct_change()[dH.pct_change()>=gap].rolling(21).std()*21**0.5, 
        name='21-day +ve volatility H',
        mode="lines"),row=3, col=1)

    fig.add_trace(go.Scattergl(
        x=dL.index[dL.pct_change()<-gap],
        y=dL.pct_change()[dL.pct_change()<-gap].rolling(21).std()*21**0.5, 
        name='21-day -ve volatility L',
        mode="lines"),row=3, col=1)

    fig.add_trace(go.Scattergl(
        x=df_test.index,
        y=df_test.pct_change().rolling(21).std()*21**0.5,#
        name='21-day volatility',
        mode="lines"),row=3, col=1)

    fig.add_trace(go.Scattergl(
        x=df_test.index,
        y=(df_test-df_test.cummax())/df_test.cummax(),
        name='Drawdown',
        mode="lines"),row=4, col=1)

    '''fig.add_trace(go.Scattergl(
        x=df_test.index,
        y=(df_test-dH.cummax())/dL.cummax(),
        name='Drawdown modified',
        mode="lines"),row=4, col=1)'''

    S = calculateSpan(df_test)

    fig.add_trace(go.Scattergl(
        x=df_test.index,
        y=S, 
        name='Drawdown days',
        mode="lines"),row=5, col=1)

    fig.add_trace(go.Scattergl(
        x=df_test.index,
        y=minmax_scale(S)*z_scale(S), 
        name='Drawdown days scaled',
        mode="lines"),row=6, col=1)


    fig.update_layout(height=800, width=1000,
                      title_text=var)
    fig.layout.yaxis1.tickformat = ',.0%' 
    fig.layout.yaxis2.tickformat = ',.0%'
    fig.layout.yaxis3.tickformat = ',.0%'
    fig.layout.yaxis4.tickformat = ',.0%'

    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.show()